{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Where Filtering\n",
    "This notebook demonstrates how to use where filtering to filter the data returned from get or query."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-08-11T18:29:08.687703Z",
     "start_time": "2023-08-11T18:29:07.757276Z"
    }
   },
   "outputs": [],
   "source": [
    "import chromadb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "client = chromadb.Client()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a new chroma collection\n",
    "collection_name = \"filter_example_collection\"\n",
    "collection = client.create_collection(name=collection_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add some data to the collection\n",
    "collection.add(\n",
    "    embeddings=[\n",
    "        [1.1, 2.3, 3.2],\n",
    "        [4.5, 6.9, 4.4],\n",
    "        [1.1, 2.3, 3.2],\n",
    "        [4.5, 6.9, 4.4],\n",
    "        [1.1, 2.3, 3.2],\n",
    "        [4.5, 6.9, 4.4],\n",
    "        [1.1, 2.3, 3.2],\n",
    "        [4.5, 6.9, 4.4],\n",
    "    ],\n",
    "    metadatas=[\n",
    "        {\"status\": \"read\"},\n",
    "        {\"status\": \"unread\"},\n",
    "        {\"status\": \"read\"},\n",
    "        {\"status\": \"unread\"},\n",
    "        {\"status\": \"read\"},\n",
    "        {\"status\": \"unread\"},\n",
    "        {\"status\": \"read\"},\n",
    "        {\"status\": \"unread\"},\n",
    "    ],\n",
    "    documents=[\"A document that discusses domestic policy\", \"A document that discusses international affairs\", \"A document that discusses kittens\", \"A document that discusses dogs\", \"A document that discusses chocolate\", \"A document that is sixth that discusses government\", \"A document that discusses international affairs\", \"A document that discusses global affairs\"],\n",
    "    ids=[\"id1\", \"id2\", \"id3\", \"id4\", \"id5\", \"id6\", \"id7\", \"id8\"],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'ids': ['id7'],\n",
       " 'embeddings': None,\n",
       " 'metadatas': [{'status': 'read'}],\n",
       " 'documents': ['A document that discusses international affairs']}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Get documents that are read and about affairs\n",
    "collection.get(where={\"status\": \"read\"}, where_document={\"$contains\": \"affairs\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'ids': ['id1', 'id8'],\n",
       " 'embeddings': None,\n",
       " 'metadatas': [{'status': 'read'}, {'status': 'unread'}],\n",
       " 'documents': ['A document that discusses domestic policy',\n",
       "  'A document that discusses global affairs']}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Get documents that are about global affairs or domestic policy\n",
    "collection.get(where_document={\"$or\": [{\"$contains\": \"global affairs\"}, {\"$contains\": \"domestic policy\"}]})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'ids': [['id7', 'id2', 'id8']],\n",
       " 'distances': [[16.740001678466797, 87.22000122070312, 87.22000122070312]],\n",
       " 'metadatas': [[{'status': 'read'},\n",
       "   {'status': 'unread'},\n",
       "   {'status': 'unread'}]],\n",
       " 'embeddings': None,\n",
       " 'documents': [['A document that discusses international affairs',\n",
       "   'A document that discusses international affairs',\n",
       "   'A document that discusses global affairs']]}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Get 5 closest vectors to [0, 0, 0] that are about affairs\n",
    "# Outputs 3 docs because collection only has 3 docs about affairs\n",
    "collection.query(query_embeddings=[[0, 0, 0]], where_document={\"$contains\": \"affairs\"}, n_results=5)"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Where Filtering With Logical Operators\n",
    "This section demonstrates how one can use the logical operators in `where` filtering.\n",
    "\n",
    "Chroma currently supports: `$and` and `$or`operators.\n",
    "\n",
    "> Note: Logical operators can be nested"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/tazarov/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [02:59<00:00, 463kiB/s] \n"
     ]
    },
    {
     "data": {
      "text/plain": "{'ids': ['1', '2'],\n 'embeddings': None,\n 'metadatas': [{'author': 'john'}, {'author': 'jack'}],\n 'documents': ['Article by john', 'Article by Jack']}"
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Or Logical Operator Filtering\n",
    "# import chromadb\n",
    "client = chromadb.Client()\n",
    "collection = client.get_or_create_collection(\"test-where-list\")\n",
    "collection.add(documents=[\"Article by john\", \"Article by Jack\", \"Article by Jill\"],\n",
    "               metadatas=[{\"author\": \"john\"}, {\"author\": \"jack\"}, {\"author\": \"jill\"}], ids=[\"1\", \"2\", \"3\"])\n",
    "\n",
    "collection.get(where={\"$or\": [{\"author\": \"john\"}, {\"author\": \"jack\"}]})\n"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-08-11T18:45:52.663345Z",
     "start_time": "2023-08-11T18:42:50.970414Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "outputs": [
    {
     "data": {
      "text/plain": "{'ids': ['1'],\n 'embeddings': None,\n 'metadatas': [{'author': 'john', 'category': 'chroma'}],\n 'documents': ['Article by john']}"
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# And Logical Operator Filtering\n",
    "collection = client.get_or_create_collection(\"test-where-list\")\n",
    "collection.upsert(documents=[\"Article by john\", \"Article by Jack\", \"Article by Jill\"],\n",
    "               metadatas=[{\"author\": \"john\",\"category\":\"chroma\"}, {\"author\": \"jack\",\"category\":\"ml\"}, {\"author\": \"jill\",\"category\":\"lifestyle\"}], ids=[\"1\", \"2\", \"3\"])\n",
    "collection.get(where={\"$and\": [{\"category\": \"chroma\"}, {\"author\": \"john\"}]})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-08-11T18:49:31.174811Z",
     "start_time": "2023-08-11T18:49:31.056618Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [
    {
     "data": {
      "text/plain": "{'ids': [], 'embeddings': None, 'metadatas': [], 'documents': []}"
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# And logical that doesn't match anything\n",
    "collection.get(where={\"$and\": [{\"category\": \"chroma\"}, {\"author\": \"jill\"}]})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-08-11T18:49:35.758816Z",
     "start_time": "2023-08-11T18:49:35.741477Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "data": {
      "text/plain": "{'ids': ['1'],\n 'embeddings': None,\n 'metadatas': [{'author': 'john', 'category': 'chroma'}],\n 'documents': ['Article by john']}"
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Combined And and Or Logical Operator Filtering\n",
    "collection.get(where={\"$and\": [{\"category\": \"chroma\"}, {\"$or\": [{\"author\": \"john\"}, {\"author\": \"jack\"}]}]})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-08-11T18:49:40.463045Z",
     "start_time": "2023-08-11T18:49:40.450240Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "outputs": [
    {
     "data": {
      "text/plain": "{'ids': ['1'],\n 'embeddings': None,\n 'metadatas': [{'author': 'john', 'category': 'chroma'}],\n 'documents': ['Article by john']}"
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "collection.get(where_document={\"$contains\": \"Article\"},where={\"$and\": [{\"category\": \"chroma\"}, {\"$or\": [{\"author\": \"john\"}, {\"author\": \"jack\"}]}]})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-08-11T18:51:12.328062Z",
     "start_time": "2023-08-11T18:51:12.315943Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "2395417914bce3169eff793a7d01bf858f95b138000d8d354eed93ead856f5e6"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
